{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Setup a classification experiment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from sklearn.datasets import load_boston\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "df = pd.read_csv(\n",
    "    \"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data\",\n",
    "    header=None)\n",
    "df.columns = [\n",
    "    \"Age\", \"WorkClass\", \"fnlwgt\", \"Education\", \"EducationNum\",\n",
    "    \"MaritalStatus\", \"Occupation\", \"Relationship\", \"Race\", \"Gender\",\n",
    "    \"CapitalGain\", \"CapitalLoss\", \"HoursPerWeek\", \"NativeCountry\", \"Income\"\n",
    "]\n",
    "# df = df.sample(frac=0.01, random_state=1)\n",
    "train_cols = df.columns[0:-1]\n",
    "label = df.columns[-1]\n",
    "X = df[train_cols]\n",
    "y = df[label].apply(lambda x: 0 if x == \" <=50K\" else 1) #Turning response into 0 and 1\n",
    "\n",
    "# We have to transform categorical variables to use sklearn models\n",
    "X_enc = pd.get_dummies(X, prefix_sep='.')\n",
    "feature_names = list(X_enc.columns)\n",
    "\n",
    "seed = 1  \n",
    "X_train, X_test, y_train, y_test = train_test_split(X_enc, y, test_size=0.20, random_state=seed)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Train a blackbox classification system"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.decomposition import PCA\n",
    "from sklearn.pipeline import Pipeline\n",
    "\n",
    "#Blackbox system can include preprocessing, not just a classifier!\n",
    "pca = PCA()\n",
    "rf = RandomForestClassifier(n_estimators=100, n_jobs=-1)\n",
    "\n",
    "blackbox_model = Pipeline([('pca', pca), ('rf', rf)])\n",
    "blackbox_model.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Show blackbox model performance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "from interpret import show\n",
    "from interpret.perf import ROC\n",
    "\n",
    "blackbox_perf = ROC(blackbox_model.predict_proba).explain_perf(X_test, y_test, name='Blackbox')\n",
    "show(blackbox_perf)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Local Explanations: How an individual prediction was made"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "from interpret.blackbox import LimeTabular\n",
    "from interpret import show\n",
    "\n",
    "#Blackbox explainers need a predict function, and optionally a dataset\n",
    "lime = LimeTabular(predict_fn=blackbox_model.predict_proba, data=X_train, random_state=1)\n",
    "\n",
    "#Pick the instances to explain, optionally pass in labels if you have them\n",
    "lime_local = lime.explain_local(X_test[:5], y_test[:5], name='LIME')\n",
    "\n",
    "show(lime_local)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from interpret.blackbox import ShapKernel\n",
    "import numpy as np\n",
    "\n",
    "background_val = np.median(X_train, axis=0).reshape(1, -1)\n",
    "shap = ShapKernel(predict_fn=blackbox_model.predict_proba, data=background_val, feature_names=feature_names)\n",
    "shap_local = shap.explain_local(X_test[:5], y_test[:5], name='SHAP')\n",
    "show(shap_local)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Global Explanations: How the model behaves overall"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "from interpret.blackbox import MorrisSensitivity\n",
    "\n",
    "sensitivity = MorrisSensitivity(predict_fn=blackbox_model.predict_proba, data=X_train)\n",
    "sensitivity_global = sensitivity.explain_global(name=\"Global Sensitivity\")\n",
    "\n",
    "show(sensitivity_global)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from interpret.blackbox import PartialDependence\n",
    "\n",
    "pdp = PartialDependence(predict_fn=blackbox_model.predict_proba, data=X_train)\n",
    "pdp_global = pdp.explain_global(name='Partial Dependence')\n",
    "\n",
    "show(pdp_global)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Compare them all in the Dashboard"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "show([blackbox_perf, lime_local, shap_local, sensitivity_global, pdp_global])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}